******************************************************************************************************************************************* fragment
*************************************** SUMMARIZING CLAIMS DATA 
******************************************************************************************************************************************* fragment
clear all 
set more off
set matsize 11000 
cd "C:\Users\sfleit2\Documents\ReclassificationRisk_DoFiles\"

********** create temp files per each of the claims data 

use "C:\Users\sfleit2\Documents\ReclassificationRisk_DoFiles\MedClaims_MultCh2014.dta", clear

gen year=2014
 
*********** NEW VARIABLES FOR RANDOM FOREST ***********************************************

gen inpatient    = (adj_hccc_cd=="06")
gen outpatient   = (adj_hccc_cd=="07")
gen specialty    = (adj_hccc_cd=="02")
gen other_claims = 1 - inpatient - outpatient - specialty

gen dollar_claims_inpatient     = inpatient  * allw_amt
gen dollar_claims_outpatient    = outpatient * allw_amt
gen dollar_claims_specialty     = specialty  * allw_amt
gen dollar_claims_other_claims  = other_claims  * allw_amt

bys mbr_sys_id year cust_seg_nbr: egen number_inpatient     = sum(inpatient)
bys mbr_sys_id year cust_seg_nbr: egen number_outpatient    = sum(outpatient)
bys mbr_sys_id year cust_seg_nbr: egen number_specialty     = sum(specialty)
bys mbr_sys_id year cust_seg_nbr: egen number_other_claims  = sum(other_claims)

********************************************************************************************

gen oop_dircomputed = coins_amt + copay_amt + ded_amt
gen opp_indircomputed = allw_amt - net_pd_amt 

forvalues i = 1/5 {
  gen codeAMI`i' = substr(diag_`i'_cd,1,4)
  gen codeDIABETES`i' = substr(diag_`i'_cd,1,3)
  local icd "410 4101 4102 4103 4104 4105 4108 4109"
     foreach x of local icd {
     gen dummyAMI_`i'_`x' = (codeAMI`i' =="`x'")
     }
  gen dummyDIABETES_`i' = (codeDIABETES`i' =="250")
  egen dummyAMI_`i'      = rowtotal(dummyAMI_`i'_*)
}

gen code_ami = 1 if dummyAMI_1 ==1 | dummyAMI_2==1 | dummyAMI_3==1 |dummyAMI_4==1 | dummyAMI_5==1   
replace code_ami=0 if code_ami==.
gen code_diabetes = 1 if dummyDIABETES_1 ==1 | dummyDIABETES_2==1 | dummyDIABETES_3==1 | dummyDIABETES_4==1 | dummyDIABETES_5==1   
replace code_diabetes=0  if code_diabetes==.

gen mem_died = (dschrg_sts_cd=="20")

gen cd2_d1 = substr(diag_1_cd,1,2)
gen cd3_d1 = substr(diag_1_cd,1,3)
gen cd4_d1 = substr(diag_1_cd,1,4)
gen cd5_d1 = substr(diag_1_cd,1,5)
gen cd2_d2 = substr(diag_2_cd,1,2)
gen cd3_d2 = substr(diag_2_cd,1,3)
gen cd4_d2 = substr(diag_2_cd,1,4)
gen cd5_d2 = substr(diag_2_cd,1,5)

cd "C:\Users\sfleit2\Documents\ReclassificationRisk_DoFiles\"

sort cd3_d1
merge cd3_d1 using Cancer3Digits.dta 
drop _merge
sort cd4_d1
merge cd4_d1 using Cancer4Digits.dta 
drop _merge
sort cd5_d1
merge cd5_d1 using Cancer5Digits.dta 
drop _merge

egen code_cancer2 = rowmax(cancer3digits cancer4digits cancer5digits)
replace code_cancer2=0 if code_cancer2!=1

gen code_leukemia = 1 if cd3_d1=="205" | cd3_d2=="205"
replace  code_leukemia=0 if  code_leukemia==.

gen code_septicemia = 1 if cd4_d1=="0389" | cd4_d2=="0389"
replace  code_septicemia=0 if  code_septicemia==.

gen code_coronaryatheros = 1 if cd3_d1=="414" | cd3_d2=="414"
replace  code_coronaryatheros=0 if  code_coronaryatheros==.

gen code_heartvalve = 1 if cd3_d1=="424" | cd3_d2=="424"
replace  code_heartvalve=0 if  code_heartvalve==.

gen code_brainhemorr = 1 if cd3_d1=="430" | cd3_d2=="430"
replace  code_brainhemorr=0 if  code_brainhemorr==.

gen code_respfailure = 1 if cd5_d1=="51881" | cd5_d2=="51881"
replace  code_respfailure=0 if  code_respfailure==.

gen code_sepshock = 1 if cd5_d1=="78552" | cd5_d2=="78552"
replace  code_sepshock=0 if  code_sepshock==.

gen code_respfainewborn = 1 if cd5_d1=="77084" | cd5_d2=="77084"
replace  code_respfainewborn=0 if  code_respfainewborn==.

gen code_respdistrnewborn = 1 if cd3_d1=="769" | cd3_d2=="769"
replace  code_respdistrnewborn=0 if  code_respdistrnewborn==.

gen code_kidneyfail = 1 if cd3_d1=="584" | cd3_d2=="584"
replace  code_kidneyfail=0 if  code_kidneyfail==.

gen code_respfaisurg = 1 if cd5_d1=="51851" | cd5_d2=="51851"
replace  code_respfaisurg=0 if  code_respfaisurg==.

gen code_neumonia = 1 if cd3_d1=="486" | cd3_d2=="486"
replace  code_neumonia=0 if  code_neumonia==.

gen code_brainedema = 1 if cd4_d1=="3485" | cd4_d2=="3485"
replace  code_brainedema=0 if  code_brainedema==.

gen code_pancytopenia = 1 if cd5_d1=="28411" | cd5_d2=="28411"
replace  code_pancytopenia=0 if  code_pancytopenia==.

gen code_posthemorranemia = 1 if cd4_d1=="2851" | cd4_d2=="2851"
replace  code_posthemorranemia=0 if  code_posthemorranemia==.

gen code_traubraininj = 1 if cd3_d1=="854" | cd3_d2=="854"
replace  code_traubraininj=0 if  code_traubraininj==.

gen code_transplant = 1 if cd3_d1=="V42" | cd3_d2=="V42"
replace  code_transplant=0 if  code_transplant==.

*new codes
gen code_asthma = 1 if cd3_d1=="493" | cd3_d2=="493"
replace  code_asthma=0 if  code_asthma==.

gen code_atrialfibrilization = 1 if cd3_d1=="427" | cd3_d2=="427"
replace  code_atrialfibrilization=0 if  code_atrialfibrilization==.

gen code_chronickidneydisease = 1 if cd3_d1=="585" | cd3_d2=="585"
replace  code_chronickidneydisease=0 if  code_chronickidneydisease==.

gen code_heartfailure = 1 if cd3_d1=="428" | cd3_d2=="428"
replace  code_heartfailure=0 if  code_heartfailure==.

gen code_hypertension = 1 if cd3_d1=="401" | cd3_d2=="401" |cd3_d1=="402" | cd3_d2=="402" | cd3_d1=="403" | cd3_d2=="403" | cd3_d1=="404" | cd3_d2=="404" | cd3_d1=="405" | cd3_d2=="405"
replace  code_hypertension=0 if  code_hypertension==.


*fixing cancer
gen code_cancer = 1 if cd2_d1=="14" | cd2_d1=="15" | cd2_d1=="16" | cd2_d1=="17" | cd2_d1=="18" | cd2_d1=="19" | cd2_d1=="20" 
*| cd2_d1=="21" | cd2_d1=="22" | cd2_d1=="23" 
replace  code_cancer=0 if  code_cancer==.

gen code_cancer_b = 1 if cd2_d1=="14" | cd2_d1=="15" | cd2_d1=="16" | cd2_d1=="17" | cd2_d1=="18" | cd2_d1=="19" | cd2_d1=="20" | cd2_d2=="14" | cd2_d2=="15" | cd2_d2=="16" | cd2_d2=="17" | cd2_d2=="18" | cd2_d2=="19" | cd2_d2=="20"
*| cd2_d1=="21" | cd2_d1=="22" | cd2_d1=="23" 
replace  code_cancer_b=0 if  code_cancer_b==.


collapse (max) number_inpatient number_outpatient number_specialty number_other_claims code_cancer_b code_hypertension code_heartfailure code_chronickidneydisease code_atrialfibrilization code_asthma code_cancer code_cancer2 code_traubraininj code_transplant code_leukemia code_septicemia code_coronaryatheros code_heartvalve code_brainhemorr code_respfailure code_sepshock code_respfainewborn code_respdistrnewborn code_kidneyfail code_respfaisurg code_neumonia code_brainedema code_pancytopenia code_posthemorranemia mem_died code_ami code_diabetes (sum) dollar_claims_inpatient dollar_claims_outpatient dollar_claims_specialty dollar_claims_other_claims oop_dircomputed net_pd_amt allw_amt (firstnm) rel_cd  , by(mbr_sys_id year cust_seg_nbr )
tempfile aux_claims
collapse (max) number_inpatient number_outpatient number_specialty number_other_claims code_cancer_b code_hypertension code_heartfailure code_chronickidneydisease code_atrialfibrilization code_asthma code_cancer code_cancer2 code_traubraininj code_transplant code_leukemia code_septicemia code_coronaryatheros code_heartvalve code_brainhemorr code_respfailure code_sepshock code_respfainewborn code_respdistrnewborn code_kidneyfail code_respfaisurg code_neumonia code_brainedema code_pancytopenia code_posthemorranemia mem_died code_ami code_diabetes (sum) dollar_claims_inpatient dollar_claims_outpatient dollar_claims_specialty dollar_claims_other_claims oop_dircomputed net_pd_amt allw_amt (firstnm) rel_cd  , by(mbr_sys_id year cust_seg_nbr )
gen customer_number = substr( cust_seg_nbr ,3,7)
sort year mbr_sys_id customer_number
rename customer_number polnbr
cd "C:\Users\sfleit2\Documents\ReclassificationRisk_DoFiles\"
save total_claims_2014_RandomForest.dta, replace 
************************************************************************************************* 
************************************************************************************************* 




************************************************************************************************* 
************************************************************************************************* 
**** PHARMACY CLAIMS DATABASE  
************************************************************************************************* 
clear all
set more off
set matsize 11000
cd "C:\Users\sfleit2\Documents\ReclassificationRisk_DoFiles\"
********** create temp files per each of the claims data 
use "C:\Users\sfleit2\Documents\ReclassificationRisk_DoFiles\MultCh_Pharm_2014.dta", clear
gen year=2014

sort ndc 
merge ndc using NDCtoCLASSES.dta
drop if _merge==2
egen drugs_classes_drugs = group(drug_class)

forvalues i =1/133{
gen aux1 = 1 if drugs_classes_drugs==`i'
replace aux1=0 if aux==.
gen aux2 = pd_amt if drugs_classes_drugs==`i'
replace aux2=0 if aux2==.
bys mbr_sys_id year cust_seg_nbr: egen drugclass_number_`i' = sum(aux1) 
bys mbr_sys_id year cust_seg_nbr: egen drugclass_dollar_`i' = sum(aux2) 
drop aux1 aux2
}

********************************
egen code_drugs = group(ndc)
bys mbr_sys_id year cust_seg_nbr: egen num_drugs = count(code_drugs)
********************************

collapse (max) drugclass_dollar_* drugclass_number_* num_drugs (sum) copay_amt ded_amt pd_amt sbmt_chrg_amt , by(mbr_sys_id year cust_seg_nbr )
gen customer_number = substr( cust_seg_nbr ,3,7)
drop cust_seg_nbr
rename customer_number polnbr
sort year mbr_sys_id polnbr
cd "C:\Users\sfleit2\Documents\ReclassificationRisk_DoFiles\"
order year mbr_sys_id polnbr sbmt_chrg_amt copay_amt ded_amt pd_amt drugclass_dollar_* drugclass_number_*
save total_pharmaclaims_2014_RandomForest.dta, replace 
*************************************************************************************************


